rm(list=ls())

library(DescTools)

# Set directories ---------------------------------------------------------

home_directory <- "[your home directory]"
output_directory <- file.path(home_directory)
master_scripts_directory<-file.path(home_directory, "DFAs_construction_code") 

setwd(master_scripts_directory)
source("[your home directory]/Gini_code/dfa_functions_for_gini.R")

# Set Parameters ----------------------------------------------------------

group <- "networth"  # Can run this for wealth and income splits (numerically-divided splits). 
agg_level <- "low"  # This program can be run on various aggregation levels, but "low" is preferred because it gives a more precise Gini measure.   
# May need to add additional aggregation levels weights below. 
method <- "fernandez" # Works with all tempdisagg methods. 
item <- "levels" # For wealth Gini, change this to "levels." For income Gini, change this to "income_levels." 

# load in DFA data 
setwd(output_directory)
fa_levels <- read_dfa(.item = item, .group = group, .agg_level = agg_level, .method = method) %>%
  mutate(date = as.yearqtr(date))

hhcount <- read_csv(file.path(output_directory, "hhcount", paste0("hhcount_", group, "_", agg_level, ".csv"))) %>%
  pivot_longer(-date, names_to = "cat", values_to = "hhcount") %>%
  mutate(date = as.yearqtr(date))

## First determine average incomes for each group 

if(agg_level == "high") {
  weights = c(1,9,40,50)
  b99_cat <- c("Next9", "Next40", "Bottom50")
  weights_b99 = c(9,40,50)
  b90_cat <- c("Next40", "Bottom50")
  weights_b90 = c(40,45)
}

if(agg_level == "low") {
  weights = c(1,4,5,rep(10,9))
  b99_cat <- c("pct95to99", "pct90to95", "pct80to90", "pct70to80", "pct60to70", "pct50to60", "pct40to50", "pct30to40", "pct20to30", "pct10to20", "Bottom10")
  weights_b99 = c(4,5,rep(10,9))
  b90_cat <- c("pct80to90", "pct70to80", "pct60to70", "pct50to60", "pct40to50", "pct30to40", "pct20to30", "pct10to20", "Bottom10")
  weights_b90 = c(rep(10,9))
}

fa_avgs <- fa_levels %>%
  select(date, cat, all_of(group)) %>%
  setNames(c("date", "cat", "value")) %>%
  left_join(hhcount, by = c("date", "cat")) %>%
  mutate(avg = value/hhcount) %>%
  select(date, cat, avg) %>%
  dplyr::group_by(date) %>%
  # the Gini function cannot process negative net worth
  dplyr::mutate(min = min(avg),
                avg = ifelse(avg <0, 0, avg)) %>%
  select(-min) %>%
  dplyr::ungroup() %>%
  # mutate(avg = ifelse(avg < 0, 0, avg)) %>%
  pivot_wider(names_from = date, values_from = avg)



gini <- numeric()
for (i in 2:length(fa_avgs)) {
  gini[i] <- Gini(x = fa_avgs[[i]], n = weights)
}

gini <- gini[!is.na(gini)]


fa_avgs_bottom99 <- fa_avgs %>%
  filter(cat %in% b99_cat) 

gini_bottom99 <- numeric()
for (i in 2:length(fa_avgs_bottom99)) {
  gini_bottom99[i] <- Gini(x = fa_avgs_bottom99[[i]], n = weights_b99)
}

gini_bottom99 <- gini_bottom99[-1]


fa_avgs_bottom90 <- fa_avgs %>%
  filter(cat %in% b90_cat) 

gini_bottom90 <- numeric()
for (i in 2:length(fa_avgs_bottom90)) {
  gini_bottom90[i] <- Gini(x = fa_avgs_bottom90[[i]], n = weights_b90)
}

gini_bottom90 <- gini_bottom90[-1]


gini <- data.frame(date = unique(fa_levels$date), gini = gini, gini_b99 = gini_bottom99, gini_b90 = gini_bottom90)


setwd("gini")
write_csv(gini, paste0("dfa_gini_", group, "_", agg_level, ".csv"))
